/*******************************************************************************
																				
	DESCRIPTION: 	This do file creates ROC curves for particular models.	
	
*******************************************************************************/

clear all
global id_code 107

* Program:
cap program drop roc
program roc 
		
	syntax, model(string) year(string) outcome(string)
		
	preserve
	
		local pred p_`outcome'
		
		* Obtain ROC curve for the ensemble prediction
		rocreg `outcome' `pred', probit ml
		matrix A = r(table)
		local auc = A[1, 3]
		noisily di "-----> Ensemble AUC: `: di %9.2f `auc''"

		rocregplot, plot1opts(msymbol(none) ///
		lcolor(orange_red)) rlopts(lpattern(dash) lcolor(gs12))  ///
		xscale(titlegap(2)) ylabel(,angle(0)) ///
		legend(off) ///
		graphregion(color(white)) plotregion(margin(zero)) name(roc1_`outcome', replace)
		
		graph export "${output}/${id_code}_ROCPlot_`outcome'_`year'_`model'_ensemble.pdf", as(pdf) replace		
		
		* Obtain ROC curve for the ensemble prediction and individual models
		rename `pred'_rf pred_rf
		rename `pred'_boost pred_boost
		rename `pred'_lasso pred_lasso
 		
		* Plot all together
		cap rocreg `outcome' pred_rf pred_boost pred_lasso `pred', probit ml
		
		cap rocregplot, plot1opts(msymbol(none) lcolor(ebblue)) plot2opts(msymbol(none) lcolor(green)) plot3opts(msymbol(none) lcolor(gold)) plot4opts(msymbol(none) lcolor(orange_red)) rlopts(lpattern(dash) lcolor(gs12)) ///
		xscale(titlegap(2)) ylabel(,angle(0)) ///
		legend(order(5 "R. Forest (AUC = `: di %4.3f _b[pred_rf:auc]')" 6 "B. Gradient (AUC = `: di %4.3f _b[pred_boost:auc]')" 7 "LASSO (AUC = `: di %4.3f _b[pred_lasso:auc]')" 8 "Ensemble (AUC = `: di %4.3f _b[`pred':auc]')") ring(0) pos(4) size(small)) ///
		graphregion(color(white)) plotregion(margin(zero))  name(roc2_`outcome', replace)
	
		graph export "${output}/${id_code}_ROCPlot_`outcome'_`year'_`model'_ensemble_vs_other.pdf", as(pdf) replace

	restore

			
end

* Set outcomes:
global outcomes emplAft6M_0M_In emplAft6M_6M_In emplAft6M_12M_In 

* Set models and year span:
global models Full
global yearSpan 2006


* Loop:
foreach model in $models  {
		
	foreach year of numlist $yearSpan {	
		
		foreach outcome in $outcomes {
			
			use "${data}/003_MainWithEnsemblePred_`model'_`year'.dta", clear
			
			roc, model(`model') outcome(`outcome') year(`year')
				
		}
			
	}
}

* Produce ROC curves for linear model:
use "${data}/003_MainWithEnsemblePred_Full_2006.dta", clear

* Merge with linear model:
merge 1:1 LopNr_PersonNr InLnr ///
	using "${data}/116_Linear_Predictions_Full_2006.dta" ///
	, assert(2 3) keep(3) nogen
	
	
	
rename emplAft6M_*M_In outcome_*M
rename p_emplAft6M_*M_In pred_*M
rename p_emplAft6M_*M_In_Linear pred_*M_Linear
	
foreach outcome in 0M 6M 12M {

	preserve

	keep if !missing(outcome_`outcome', pred_`outcome', pred_`outcome'_Linear)
	
		* Plot all together
		rocreg outcome_`outcome' pred_`outcome' pred_`outcome'_Linear, probit ml
		
		rocregplot, plot1opts(msymbol(none) lcolor(ebblue)) plot2opts(msymbol(none) lcolor(orange_red)) rlopts(lpattern(dash) lcolor(gs12)) ///
		xscale(titlegap(2)) ylabel(,angle(0)) ///
		legend(order(3 "Ensemble (AUC = `: di %4.3f _b[pred_`outcome':auc]')" 4 "Linear (AUC = `: di %4.3f _b[pred_`outcome'_Linear:auc]')") ring(0) pos(4) size(small)) ///
		graphregion(color(white)) plotregion(margin(zero))  name(roc3_`outcome', replace)
	
		graph export "${output}/${id_code}_ROCPlot_`outcome'_2006_Full_ensemble_vs_Linear.pdf", as(pdf) replace

	restore
		
}

